package top.jacktgq.sxt.bio.demo2;

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;

/**
 * @Author CandyWall
 * @Date 2019/10/20--18:16
 * @Description 网络爬虫的原理02
 */
public class SpiderTest02 {
    public static void main(String[] args) throws Exception {
        //获取URL
        //URL url = new URL("https://www.jd.com");
        //入过爬取大众点评的数据，会报403错误，拒绝你的访问
        URL url = new URL("https://www.dianping.com");

        //下载资源
        HttpURLConnection urlConnection = (HttpURLConnection) url.openConnection();
        urlConnection.setRequestMethod("GET");
        urlConnection.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.26 Safari/537.36");
        BufferedReader reader = new BufferedReader(new InputStreamReader(urlConnection.getInputStream(), "utf-8"));
        String line = null;
        while (null != (line = reader.readLine())) {
            System.out.println(line);
        }
        reader.close();

        //分析

        //处理
    }
}
